setwd("")

library(foreign)
library(reshape)
library(stringr)
library(plyr)
library(dplyr)
library(countrycode)
library(data.table)


# Build Party Position from EB Trendfile #
##########################################

rm(list = ls())

# Select relevant variables from EB Trendfile. 

# eb <- read.dta("./rawdata/ZA3521_v2-0-1.dta")
# eb <- select(eb, study_id, year, nation1, feelclo, lastvote, lrs, voteint)
# save(eb, file="./usedata/ebfiltered.Rdata")

# Load all data 
load("./usedata/ebfiltered.Rdata")
linkfile <- read.dta("./rawdata/link file for parties in EES.dta")
linkfile <- as.data.frame(linkfile)
linkfile$country <- mapvalues(linkfile$country, c("britain", "northern ireland"), c("united kingdom", "united kingdom"))
linkfile <- select( linkfile, country, prty_code, zeus_eb_trendfile)
linkfile <- rename(linkfile, zeus = zeus_eb_trendfile, cmp_id=prty_code)
linkfile <- filter(linkfile, !duplicated(linkfile))

# idiosyncratic corrections to have unique set of cmp_ids 
linkfile$zeus[linkfile$cmp_id==21421 & linkfile$zeus==306] <- NA # 21421 is PVV Party of Liberty and Progress
linkfile$zeus[linkfile$cmp_id==32220 & linkfile$zeus==103] <- NA # PCI Communist Party
linkfile$zeus[linkfile$cmp_id==32520 & linkfile$zeus==401] <- NA # DC Christan Democrats
linkfile$zeus[linkfile$cmp_id==51320 & linkfile$zeus==202] <- NA # Labour Party
linkfile <- filter(linkfile, !is.na(zeus))

# Recoding some variables. 
eb$lrs <- mapvalues(eb$lrs, c("left", "right", "DK,NA", "inap", "refused"), c("1", "10", NA, NA, NA))
eb <- filter(eb, !is.na(lrs))
eb$nation1 <- tolower(eb$nation1)
eb$nation1 <- mapvalues(eb$nation1, c("great britain", "northern ireland", "germany-west", "germany-east"), c("united kingdom", "united kingdom", "germany", "germany"))

eb <- mutate(eb, feelclo = ifelse(feelclo > 992, NA, feelclo), 
		 	voteint = ifelse(voteint > 992, NA, voteint) )

eb$voteint = as.numeric(eb$voteint)
eb$feelclo = as.numeric(eb$feelclo)
eb$lrs <- as.numeric(eb$lrs)

# Aggregate by int. vote choice and 'feel-close'
eb_tmp <- filter(eb, !is.na(voteint))
eb_lrs_agg <- summarize(group_by(eb_tmp, nation1, year, voteint), eblrs=mean(lrs) )
eb_lrs_agg <- rename(eb_lrs_agg, zeus=voteint, country=nation1)

eb_tmp <- filter(eb, !is.na(feelclo))
eb_feelclo_agg <- summarize(group_by(eb_tmp, nation1, year, feelclo), eblrs=mean(lrs) )
eb_feelclo_agg <- rename(eb_feelclo_agg, zeus=feelclo, country=nation1)


# Merge 
eb_lrs_agg <- merge(eb_lrs_agg,linkfile,by=c("country","zeus"), all.x=FALSE, all.y=FALSE)
eb_lrs_agg <- select(eb_lrs_agg, -zeus)

eb_feelclo_agg <- merge(eb_feelclo_agg,linkfile,by=c("country","zeus"))
eb_feelclo_agg <- select(eb_feelclo_agg, -zeus)

eb_feelclo_agg <- rename(eb_feelclo_agg, ebfeellrs= eblrs)

eb_agg <- merge(eb_lrs_agg, eb_feelclo_agg, all=TRUE)

save(eb_agg, file="./usedata/eb_agg.Rdata")




# Building wordfish 1.0/2.0 #
##############################

rm(list = ls())

load("./rawdata/cmp_merge.rda")

# Recode's the German Green Party and The Left to the correct CMP IDs
germany <- rename(germany, cmp_id = cmp.names)
germany$country <- "germany"
germany <- select(germany, country, year, cmp_id, theta.het, theta.wf)
germany$cmp_id[germany$cmp_id==41111 & germany$year ==1990  ] <- 41112
germany$cmp_id[germany$cmp_id==41111 & germany$year >=1994  ] <- 41113
germany$cmp_id[germany$cmp_id==41221 & germany$year ==2005  ] <- 41222
germany$theta.het <- germany$theta.het * -1
germany$theta.wf <- germany$theta.wf * -1

ireland <- rename(ireland,  cmp_id = cmp.names)
ireland$cmp_id = substr(ireland$cmp_id, 2, 6)
ireland$eng.names = as.character(ireland$eng.names)
ireland$year = substr(ireland$eng.names, nchar(ireland$eng.names)-4+1, nchar(ireland$eng.names))
ireland$country = "ireland"
ireland = select(ireland, country,year, cmp_id, theta.het, theta.wf)

# Note: Remove the 2006 election since CMP data end 2003 for Netherlands
netherlands <- rename(netherlands,  cmp_id = cmp.names)
netherlands$cmp_id = substr(netherlands$cmp_id, 2, 6)
netherlands$eng.names = as.character(netherlands$eng.names)
netherlands$year = substr(netherlands$eng.names, nchar(netherlands$eng.names)-4+1, nchar(netherlands$eng.names))
netherlands$country = "netherlands"
netherlands <- filter(netherlands, year!=2006)
netherlands = select(netherlands, country, year, cmp_id, theta.het, theta.wf)
netherlands$theta.het <- netherlands$theta.het * -1
netherlands$theta.wf <- netherlands$theta.wf * -1

# Note: Corrected Typo. Swedish general election was in 2005 not 2006
sweden <- rename(sweden,  cmp_id = cmp.names)
sweden$cmp_id = substr(sweden$cmp_id, 2, 6)
sweden$eng.names = as.character(sweden$eng.names)
sweden$year = substr(sweden$eng.names, nchar(sweden$eng.names)-4+1, nchar(sweden$eng.names))
sweden$year[sweden$year==2005] <- 2006
sweden$country <- "sweden"
sweden = select(sweden, country, year, cmp_id, theta.het, theta.wf)

wordfish = rbind(germany, ireland, netherlands, sweden)
rownames(wordfish) = NULL

save(wordfish, file="./usedata/wordfish.Rdata")



# Building Wordscore #
######################

rm(list = ls())

ws <- read.dta("./rawdata/pp_dataset.dta")
cmp <- read.csv("./rawdata/MPDataset_full2010b.csv", sep=',')

cmp <- select(cmp, countryname, partyname, edate, rile, party)
cmp$edate <- as.Date(cmp$edate, "%m/%d/%Y")

ws <- select(ws, countryname, country, partyname, year, edate, rile, bl_trsc)

ws <- merge(ws,cmp)
ws <- select(ws, party,year, country, bl_trsc)
ws <- rename(ws, cmp_id=party, wordsc = bl_trsc)
ws$country <- as.character(ws$country)
ws$country[ws$country=="great britain"] <- "united kingdom"
save(ws, file="./usedata/wordscores.Rdata")



# Building Benoit Laver #
#########################

rm(list = ls())

# we fixed the encoding in the dta file and exported it as UTF-8-encoded the csv file
# the mergining will not work properly with the dta file, because of the non-ASCI party names 
ppmd <- read.csv("./rawdata/PPMD_summary_data.csv", stringsAsFactors=FALSE)
ppmd <- filter(ppmd, Dimension=="Left-Right", Scale=="Position")
ppmd <- select(ppmd, Country, Party, Mean, Election_Date)
ppmd <- rename(ppmd, year=Election_Date, bllrs=Mean)
ppmd$countryname = ""

ppmd$countryname[ppmd$Country=="AT"] <- "Austria"
ppmd$countryname[ppmd$Country=="BE"] <- "Belgium"
ppmd$countryname[ppmd$Country=="CY"] <- "Cyprus"
ppmd$countryname[ppmd$Country=="CZ"] <- "Czech Republic"
ppmd$countryname[ppmd$Country=="DE"] <- "Germany"
ppmd$countryname[ppmd$Country=="DK"] <- "Denmark"
ppmd$countryname[ppmd$Country=="EE"] <- "Estonia"
ppmd$countryname[ppmd$Country=="FI"] <- "Finland"
ppmd$countryname[ppmd$Country=="FR"] <- "France"
ppmd$countryname[ppmd$Country=="GR"] <- "Greece"
ppmd$countryname[ppmd$Country=="HU"] <- "Hungary"
ppmd$countryname[ppmd$Country=="IE"] <- "Ireland"
ppmd$countryname[ppmd$Country=="IT"] <- "Italy"
ppmd$countryname[ppmd$Country=="LT"] <- "Lithuania"
ppmd$countryname[ppmd$Country=="LU"] <- "Luxembourg"
ppmd$countryname[ppmd$Country=="LV"] <- "Latvia"
ppmd$countryname[ppmd$Country=="MT"] <- "Malta"
ppmd$countryname[ppmd$Country=="NL"] <- "Netherlands"
ppmd$countryname[ppmd$Country=="PT"] <- "Portugal"
ppmd$countryname[ppmd$Country=="PL"] <- "Poland"
ppmd$countryname[ppmd$Country=="SE"] <- "Sweden"
ppmd$countryname[ppmd$Country=="SI"] <- "Slovenia"
ppmd$countryname[ppmd$Country=="SK"] <- "Slovakia"
ppmd$countryname[ppmd$Country=="ES"] <- "Spain"
ppmd$countryname[ppmd$Country=="UK"] <- "United Kingdom"

ppmd <- filter(ppmd, countryname!="")
ppmd$countryname <- tolower(ppmd$countryname)

linkfile <- read.dta("./rawdata/link file for parties in EES.dta")
linkfile <- as.data.frame(linkfile)
linkfile$country <- mapvalues(linkfile$country, c("britain"), c("united kingdom"))
linkfile <- linkfile[linkfile$country %in% c("austria", "belgium", "cyprus", "czech republic", "germany", "denmark", "estonia", "finland", "france", "greece", "hungary", "ireland", "italy", "lithuania", "luxembourg", "latvia", "malta", "netherlands", "poland",  "portugal", "sweden", "slovenia", "slovakia", "spain", "united kingdom"), ] 
linkfile <- select( linkfile, country, prty_code, ppmd)
linkfile <- filter(linkfile, ppmd!="")
linkfile <- rename(linkfile,  cmp_id=prty_code)


ppmd <- merge(ppmd, linkfile, by=c("countryname", "Party"), by.y=c("country", "ppmd"), all.x=TRUE, all.y=FALSE)
# checked manually those that didn't match: ppmd[is.na(ppmd$cmp_id),]
# fill in some manually: 
ppmd$cmp_id[ppmd$countryname=="malta" & ppmd$Party == "MLP"] <- 54620
ppmd$cmp_id[ppmd$countryname=="malta" & ppmd$Party == "NP"] <- 54620
ppmd$cmp_id[ppmd$countryname=="germany" & ppmd$Party == "CDU/CSU"] <- 41521	# CDU/C
ppmd$cmp_id[ppmd$countryname=="germany" & ppmd$Party == "GRÜ"] <- 41113	# GRU
ppmd$cmp_id[ppmd$countryname=="estonia" & ppmd$Party == "EÜRP"] <- 83952	# EÜRP
ppmd$cmp_id[ppmd$countryname=="estonia" & ppmd$Party == "Mõõd"] <- 83410	# Mood
ppmd$cmp_id[ppmd$countryname=="hungary" & ppmd$Party == "MIÉP"] <- 86620	# MIEP
ppmd$cmp_id[ppmd$countryname=="hungary" & ppmd$Party == "MUNKÁS"] <- 86210	# MUNKAS

ppmd <- select(ppmd, -Country, -countryname, -Party)
save(ppmd, file="./usedata/ppmd.Rdata")



# Building Somer-Topcu / Bawn #
###############################

rm(list = ls())

substrRight <- function(x, n){
  substr(x, nchar(x)-n+1, nchar(x))
}

bst <- read.csv("./rawdata/AJPS_Bawn_Somer-Topcu_data1.csv", 
	sep=";", dec=",", stringsAsFactors=FALSE)

bst$year_1 <- as.numeric(substrRight(bst$edate, 2))
bst$year_2 <- ifelse (bst$year_1 < 10, 2000, 1900)  
bst$year <- bst$year_2 + bst$year_1

bst <- select(bst,  partycode, year, party_position_supp )
bst <- rename(bst, bstlr = party_position_supp )
save(bst, file="./usedata/bst.Rdata")



# Building Castles / Mair 1984 #
################################

rm(list = ls())

cm <- read.csv("./rawdata/CastlesMair1984.csv", sep=";", dec=",", stringsAsFactors=FALSE)
cm$castles_mair <- paste(cm$CountryID, str_pad(cm$PartyID, 2, pad = "0"), sep="")

viewparty <- read.csv("./rawdata/view_party.csv", sep=",", stringsAsFactors=FALSE)
viewparty <- viewparty[,c("party_name_short", "cmp", "castles_mair")]
viewparty <- viewparty[!is.na(viewparty$castles_mair), ]

cm <- merge(cm, viewparty, by="castles_mair", all.x=FALSE, all.y=TRUE)

cm <- mutate(cm, year=1984)
cm <- rename(cm, cmlrs = Score, cmp_id = cmp)
cm <- select(cm, cmlrs, cmp_id, year)
cm <- na.omit(cm)

save(cm, file="./usedata/cm.Rdata")



# Building Huber-Inglehart #
###########################

rm(list = ls())

hi <- read.csv("./rawdata/HuberInglehart.csv", sep=",",  stringsAsFactors=FALSE)
hi <- filter(hi, !is.na(tabnr))

hi$huber_inglehart <- paste(hi$tabnr, str_pad(hi$tabnr2, 2, pad = "0"), sep="")
hi <- select(hi, position, huber_inglehart)

viewparty <- read.csv("./rawdata/view_party.csv", sep=",", stringsAsFactors=FALSE)
viewparty <- viewparty[,c("country_name", "party_name_short", "cmp", "huber_inglehart")]
viewparty <- viewparty[!is.na(viewparty$huber_inglehart), ]

# Correct coding for Austria 
viewparty <- viewparty %>% mutate( huber_inglehart=ifelse(cmp==42420, 305 ,huber_inglehart), 
						 huber_inglehart=ifelse(cmp==42110, 301 ,huber_inglehart), 
						 huber_inglehart=ifelse(cmp==42421, 304 ,huber_inglehart), 
						 huber_inglehart=ifelse(cmp==42520, 303 ,huber_inglehart), 
						 huber_inglehart=ifelse(cmp==42320, 302 ,huber_inglehart)  )

hi <- merge(hi, viewparty, by="huber_inglehart", all.x=FALSE, all.y=FALSE)

hi <- mutate(hi, year=1993)
hi <- rename(hi, hilrs = position, cmp_id = cmp)
hi <- select(hi, hilrs, cmp_id, year)
hi <- na.omit(hi)

save(hi, file="./usedata/hi.Rdata")











# Merge with in MCSS #
######################

rm(list = ls())


load("./rawdata/mcss.Rdata")

mcss <- filter(mcss, study==1)
mcss <- select(mcss, countryname, year, lr.mu.m1, rile, party)
mcss <- rename(mcss, cmp_id = party, mcsslrs=lr.mu.m1, country=countryname)
mcss <- mutate(mcss, country = tolower(country))

partypos <- mcss



# Merge Benoit Laver #
######################


load("./usedata/ppmd.Rdata")
 
partypos <- merge(partypos, ppmd, by.x=c("cmp_id", "year"), by.y=c("cmp_id", "year"), all.x=TRUE, all.y=FALSE)


# Merge Wordfish Scores # 
#########################

load("./usedata/wordfish.RData")

partypos <- merge(partypos, wordfish, by=c("cmp_id", "year", "country"), all=TRUE)


# Merge Wordscore Scores # 
##########################

load("./usedata/wordscores.Rdata")
partypos <- merge(partypos, ws, by=c("cmp_id", "year", "country"), all.x=TRUE, all.y=FALSE)



# Merge Bawn / Somer-Topcu #
############################

# Note: not all parties bst in cmp! (that is why: all.y=FALSE)

load("./usedata/bst.Rdata")

partypos <- merge(partypos, bst, by.x=c("cmp_id", "year"), by.y=c("partycode", "year"), 
			all.x=TRUE, all.y=FALSE)

save(partypos, file="partypos.Rdata")



# Merge EB Positions #
######################

load("./usedata/eb_agg.Rdata")

partypos$cmp_id <- as.character(partypos$cmp_id)
eb_agg$cmp_id <- as.character(eb_agg$cmp_id)
partypos <- data.table(partypos, key = c("cmp_id", "country", "year"))
eb_agg <- data.table(eb_agg, key = c("cmp_id", "country", "year"))

eb_agg$year_eb <- eb_agg$year
eb_agg$id_eb <- seq(1,nrow(eb_agg))

# exact match for (country, cmp_id) but nearest for (year)
tmp <- eb_agg[partypos, roll='nearest']
tmp <- mutate(tmp, year_diff_eb = year-year_eb )

# The mergining results in double-usage of values from eb_agg
# 'good-matches' (eb_agg entries matched with closest cmp 
#	election occuring either in the eb_agg's year or later)
g1 <- group_by(tmp, id_eb) %>% filter(year_diff_eb >= 0 )  %>% filter( year_diff_eb==min(year_diff_eb) )

# 'bad-matches' (which is the complement set to g1)
# 	(g4: no match, g3: eb after cmp entry, 
# 	g2: there is a closer match)
g2 <- group_by(tmp, id_eb) %>% filter(year_diff_eb >= 0 )  %>% filter( year_diff_eb!=min(year_diff_eb) )
g3 <- group_by(tmp, id_eb) %>% filter( !(year_diff_eb >= 0) )  
g4 <- group_by(tmp, id_eb) %>% filter( is.na(year_diff_eb) )  
g0 <- rbind(g2,g3,g4) %>% ungroup
g0 <- mutate(g0, eblrs = NA, ebfeellrs = NA, year_eb = NA, year_diff_eb = NA, id_eb=NA)


partypos <- bind_rows(g1, g0) %>% ungroup
partypos <- partypos %>% select(-id_eb, -year_eb)






# Merge Castle-Mair 1984 #
#####################

load("./usedata/cm.Rdata")

cm$cmp_id <- as.character(cm$cmp_id)
partypos <- data.table(partypos, key = c("cmp_id", "year"))
cm <- data.table(cm, key = c("cmp_id", "year"))

cm$year_cm <- cm$year
cm$id_cm <- seq(1,nrow(cm))

# exact match for (country, cmp_id) but nearest for (year)
tmp <- cm[partypos, roll='nearest']
tmp <- mutate(tmp, year_diff_cm = year-year_cm )

# same logic as above (for eb_agg)
g1 <- group_by(tmp, id_cm) %>% filter(year_diff_cm >= 0 )  %>% filter( year_diff_cm==min(year_diff_cm) )

g2 <- group_by(tmp, id_cm) %>% filter(year_diff_cm >= 0 )  %>% filter( year_diff_cm!=min(year_diff_cm) )
g3 <- group_by(tmp, id_cm) %>% filter( !(year_diff_cm >= 0) )  
g4 <- group_by(tmp, id_cm) %>% filter( is.na(year_diff_cm) )  
g0 <- rbind(g2,g3,g4) %>% ungroup
g0 <- mutate(g0, cmlrs = NA, year_cm = NA, year_diff_cm = NA, id_cm=NA)

partypos <- bind_rows(g1, g0) %>% ungroup
partypos <- select(partypos, -id_cm, -year_cm)






# Merge Huber-Inglehart 1995 #
##############################

load("./usedata/hi.Rdata")

hi$cmp_id <- as.character(hi$cmp_id)
partypos <- data.table(partypos, key = c("cmp_id", "year"))
hi <- data.table(hi, key = c("cmp_id", "year"))

hi$year_hi <- hi$year
hi$id_hi <- seq(1,nrow(hi))

# exact match for (country, cmp_id) but nearest for (year)
tmp <- hi[partypos, roll='nearest']
tmp <- mutate(tmp, year_diff_hi = year-year_hi )

# same logic as above (for eb_agg)
g1 <- group_by(tmp, id_hi) %>% filter(year_diff_hi >= 0 )  %>% filter( year_diff_hi==min(year_diff_hi) )

g2 <- group_by(tmp, id_hi) %>% filter(year_diff_hi >= 0 )  %>% filter( year_diff_hi!=min(year_diff_hi) )
g3 <- group_by(tmp, id_hi) %>% filter( !(year_diff_hi >= 0) )  
g4 <- group_by(tmp, id_hi) %>% filter( is.na(year_diff_hi) )  
g0 <- rbind(g2,g3,g4) %>% ungroup
g0 <- mutate(g0, hilrs = NA, year_hi = NA, year_diff_hi = NA, id_hi=NA)

partypos <- bind_rows(g1, g0) %>% ungroup
partypos <- select(partypos, -id_hi, -year_hi)

partypos <- select(partypos, country, cmp_id, year, rile, contains('lr'), contains("theta"), contains("wordsc"), contains("year_diff"))


# GET IT OUT #
##############

save(partypos, file="MannheimPartyPositions.Rdata")



